#Installing Packages

#VIEWING DATA

getwd()
## [1] "/Users/elragaby/Desktop/Data Science"
dir()
##  [1] "coor"                            "Data Science.Rproj"             
##  [3] "Heart2022.csv"                   "HeartRData.R"                   
##  [5] "NBAPlayersData.csv"              "neighborhood_boundaries.geojson"
##  [7] "NY Airbnb Listings.Rmd"          "NY-Airbnb-Listings.Rmd"         
##  [9] "NY.R"                            "NY2024.csv"                     
## [11] "USACOLLEGES.csv"
NYData <- read.csv("NY2024.csv", TRUE, ",")
head(NYData)
##             id                                                          name
## 1 1.312228e+06                    Rental unit in Brooklyn · ★5.0 · 1 bedroom
## 2 4.527754e+07 Rental unit in New York · ★4.67 · 2 bedrooms · 1 bed · 1 bath
## 3 9.713540e+17 Rental unit in New York · ★4.17 · 1 bedroom · 2 beds · 1 bath
## 4 3.857863e+06  Rental unit in New York · ★4.64 · 1 bedroom · 1 private bath
## 5 4.089661e+07           Condo in New York · ★4.91 · Studio · 1 bed · 1 bath
## 6 4.958498e+07           Rental unit in New York · ★5.0 · 1 bedroom · 1 bath
##     host_id          host_name   borough       neighborhood latitude longitude
## 1   7130382             Walter  Brooklyn       Clinton Hill 40.68371 -73.96461
## 2  51501835           Jeniffer Manhattan     Hell's Kitchen 40.76661 -73.98810
## 3 528871354             Joshua Manhattan            Chelsea 40.75076 -73.99461
## 4  19902271 John And Catherine Manhattan Washington Heights 40.83560 -73.94250
## 5  61391963     Stay With Vibe Manhattan        Murray Hill 40.75112 -73.97860
## 6  51501835           Jeniffer Manhattan     Hell's Kitchen 40.75995 -73.99296
##         room_type price minimum_nights number_of_reviews last_review
## 1    Private room    55             30                 3    12/20/15
## 2 Entire home/apt   144             30                 9      5/1/23
## 3 Entire home/apt   187              2                 6    12/18/23
## 4    Private room   120             30               156     9/17/23
## 5 Entire home/apt    85             30                11     12/3/23
## 6 Entire home/apt   115             30                 5     7/29/23
##   reviews_per_month calculated_host_listings_count availability_365
## 1              0.03                              1                0
## 2              0.24                            139              364
## 3              1.67                              1              343
## 4              1.38                              2              363
## 5              0.24                            133              335
## 6              0.16                            139              276
##   number_of_reviews_ltm    license rating bedrooms beds         baths
## 1                     0 No License      5        1    1 Not specified
## 2                     2 No License   4.67        2    1             1
## 3                     6     Exempt   4.17        1    2             1
## 4                    12 No License   4.64        1    1             1
## 5                     3 No License   4.91   Studio    1             1
## 6                     2 No License      5        1    1             1

#EXPLORING DATA

## [1] 20758
## 'data.frame':    20758 obs. of  22 variables:
##  $ id                            : num  1.31e+06 4.53e+07 9.71e+17 3.86e+06 4.09e+07 ...
##  $ name                          : chr  "Rental unit in Brooklyn · ★5.0 · 1 bedroom" "Rental unit in New York · ★4.67 · 2 bedrooms · 1 bed · 1 bath" "Rental unit in New York · ★4.17 · 1 bedroom · 2 beds · 1 bath" "Rental unit in New York · ★4.64 · 1 bedroom · 1 private bath" ...
##  $ host_id                       : int  7130382 51501835 528871354 19902271 61391963 51501835 51501835 51501835 2526182 14251313 ...
##  $ host_name                     : chr  "Walter" "Jeniffer" "Joshua" "John And Catherine" ...
##  $ borough                       : chr  "Brooklyn" "Manhattan" "Manhattan" "Manhattan" ...
##  $ neighborhood                  : chr  "Clinton Hill" "Hell's Kitchen" "Chelsea" "Washington Heights" ...
##  $ latitude                      : num  40.7 40.8 40.8 40.8 40.8 ...
##  $ longitude                     : num  -74 -74 -74 -73.9 -74 ...
##  $ room_type                     : chr  "Private room" "Entire home/apt" "Entire home/apt" "Private room" ...
##  $ price                         : int  55 144 187 120 85 115 105 130 90 292 ...
##  $ minimum_nights                : int  30 30 2 30 30 30 30 30 30 30 ...
##  $ number_of_reviews             : int  3 9 6 156 11 5 3 10 19 12 ...
##  $ last_review                   : chr  "12/20/15" "5/1/23" "12/18/23" "9/17/23" ...
##  $ reviews_per_month             : num  0.03 0.24 1.67 1.38 0.24 0.16 0.1 0.26 0.24 1.71 ...
##  $ calculated_host_listings_count: int  1 139 1 2 133 139 139 139 2 1 ...
##  $ availability_365              : int  0 364 343 363 335 276 364 295 5 365 ...
##  $ number_of_reviews_ltm         : int  0 2 6 12 3 2 0 2 2 12 ...
##  $ license                       : chr  "No License" "No License" "Exempt" "No License" ...
##  $ rating                        : chr  "5" "4.67" "4.17" "4.64" ...
##  $ bedrooms                      : chr  "1" "2" "1" "1" ...
##  $ beds                          : int  1 1 2 1 1 1 1 2 1 1 ...
##  $ baths                         : chr  "Not specified" "1" "1" "1" ...
##                             id                           name 
##                              0                              0 
##                        host_id                      host_name 
##                              0                              0 
##                        borough                   neighborhood 
##                              0                              0 
##                       latitude                      longitude 
##                              0                              0 
##                      room_type                          price 
##                              0                              0 
##                 minimum_nights              number_of_reviews 
##                              0                              0 
##                    last_review              reviews_per_month 
##                              0                              0 
## calculated_host_listings_count               availability_365 
##                              0                              0 
##          number_of_reviews_ltm                        license 
##                              0                              0 
##                         rating                       bedrooms 
##                              0                              0 
##                           beds                          baths 
##                              0                              0
##                                                            name   host_id
## 1                    Rental unit in Brooklyn · ★5.0 · 1 bedroom   7130382
## 2 Rental unit in New York · ★4.67 · 2 bedrooms · 1 bed · 1 bath  51501835
## 3 Rental unit in New York · ★4.17 · 1 bedroom · 2 beds · 1 bath 528871354
##     borough   neighborhood latitude longitude       room_type price
## 1  Brooklyn   Clinton Hill 40.68371 -73.96461    Private room    55
## 2 Manhattan Hell's Kitchen 40.76661 -73.98810 Entire home/apt   144
## 3 Manhattan        Chelsea 40.75076 -73.99461 Entire home/apt   187
##   minimum_nights number_of_reviews last_review reviews_per_month
## 1             30                 3    12/20/15              0.03
## 2             30                 9      5/1/23              0.24
## 3              2                 6    12/18/23              1.67
##   calculated_host_listings_count availability_365 number_of_reviews_ltm
## 1                              1                0                     0
## 2                            139              364                     2
## 3                              1              343                     6
##      license rating bedrooms beds         baths
## 1 No License      5        1    1 Not specified
## 2 No License   4.67        2    1             1
## 3     Exempt   4.17        1    2             1
## NULL
## NULL
## [1] "Private room"    "Entire home/apt" "Hotel room"      "Shared room"
##      Host_ID Listings
## 1  162280872      146
## 2   51501835      129
## 3   61391963      113
## 4  107434423      107
## 5   19303369       87
## 6  137358866       81
## 7  219517861       81
## 8  200239515       70
## 9  120762452       69
## 10 204704622       62

#Visualizing Data

#Hosts with the most listings in NYC

viz_1 <- ggplot(top_host_df, aes(x = reorder(Host_ID, -Listings), y = Listings, fill = Listings)) +
  geom_bar(stat = "identity") +
  labs(title = "Hosts with the most listings in NYC",
       x = "Host IDs",
       y = "Count of listings") +
  scale_fill_gradient(low = "lightblue", high = "darkblue") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(viz_1)

#Density and Distribution of Prices for Each Neighborhood 

sub_6 <- subset(NYData, price < 500)
viz_2 <- ggplot(sub_6, aes(x = borough, y = price, fill = borough)) +
  geom_violin() +
  labs(title = "Density and Distribution of Prices for Each Neighborhood",
       x = "Neighborhood",
       y = "Price Per Night") +
  theme(legend.position = "none", 
        axis.title.x = element_text(face = "bold"), 
        axis.title.y = element_text(face = "bold"))
print(viz_2)

#Density and Distribution of Prices for Each Neighborhood 

top_neighbourhoods <- head(names(sort(table(NYData$borough), decreasing = TRUE)), 10)
sub_6 <- subset(NYData, price < 500 & borough %in% top_neighbourhoods)
viz_boxplot <- ggplot(sub_6, aes(x = borough, y = price, fill = borough)) +
  geom_boxplot() +
  labs(title = "Distribution of Prices for Each Neighbourhood",
       x = "Neighbourhood",
       y = "Price Per Night") +
  theme(legend.position = "none", 
        axis.title.x = element_text(face = "bold"), 
        axis.title.y = element_text(face = "bold"))

print(viz_boxplot)

#Visualizing the distribution of room types
options(repr.plot.width=12, repr.plot.height=6)

NYData$room_type <- factor(NYData$room_type, levels = unique(NYData$room_type))

custom_colors <- c("blue", "green", "orange", "red")

viz_grouped_bar <- ggplot(NYData, aes(x = borough, fill = room_type)) +
  geom_bar(position = "dodge", width = 0.7) +
  labs(title = "Distribution of Room Types per Neighborhood Group",
       x = "Neighborhood Group",
       y = "Number of Listings") +
  scale_fill_manual(values = custom_colors) +
  theme_minimal()

print(viz_grouped_bar)

#Creating a map of New York and its neighborhoods

library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(leaflet)
library(leaflet.extras)

coor <- st_read("coor")
## Reading layer `coor' from data source `/Users/elragaby/Desktop/Data Science/coor' using driver `GeoJSON'
## Simple feature collection with 310 features and 4 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS 84
str(coor)
## Classes 'sf' and 'data.frame':   310 obs. of  5 variables:
##  $ neighborhood: chr  "Allerton" "Alley Pond Park" "Arden Heights" "Arlington" ...
##  $ boroughCode : chr  "2" "4" "5" "5" ...
##  $ borough     : chr  "Bronx" "Queens" "Staten Island" "Staten Island" ...
##  $ X.id        : chr  "http://nyc.pediacities.com/Resource/Neighborhood/Allerton" "http://nyc.pediacities.com/Resource/Neighborhood/Alley_Pond_Park" "http://nyc.pediacities.com/Resource/Neighborhood/Arden_Heights" "http://nyc.pediacities.com/Resource/Neighborhood/Arlington" ...
##  $ geometry    :sfc_POLYGON of length 310; first list element: List of 1
##   ..$ : num [1:16, 1:2] -73.8 -73.8 -73.9 -73.9 -73.9 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
##   ..- attr(*, "names")= chr [1:4] "neighborhood" "boroughCode" "borough" "X.id"
ny_airbnb <- read.csv("NY2024.csv")

# Calculating average price by neighborhood
feq <- aggregate(ny_airbnb$price, by = list(ny_airbnb$neighborhood), FUN = mean)
names(feq) <- c('neighborhood', 'average_price')

# Merging 'coor' and 'feq' data frames on 'neighborhood'
coor <- merge(coor, feq, by='neighborhood', all.x=TRUE)

# Rounding 'average_price' to the nearest integer
coor$average_price <- round(coor$average_price)

# Setting the range manually based on your data
color_scale <- colorNumeric(palette= "Reds", domain = NULL)

# Creating a leaflet map
map3 <- leaflet(coor) %>%
  setView(lng = -74.00, lat = 40.71, zoom = 10) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor = ~color_scale(average_price),
    weight = 1,
    color = "black",
    fillOpacity = 0.5,
    highlight = highlightOptions(
      weight = 3,
      fillColor = ~color_scale(average_price),
      fillOpacity = 0.8
    ),
    #label = ~paste(neighborhood, ': '," $",average_price, "/ night")
  ) %>%
  addLegend(
    position = "bottomright",
    pal = color_scale,
    values = coor$average_price,
    title = "Average Price",
    opacity = 0.7
  )

map3
#Creating a map with all the AirBnB listings

library(sf)
library(leaflet)
library(leaflet.extras)

# Creating a simple spatial object
NYData_sf <- st_as_sf(NYData, coords = c("longitude", "latitude"), crs = 4326)

colors_red <- colorNumeric(palette = "Reds", domain = NULL)

# Creating leaflet map
map <- leaflet(NYData_sf) %>%
  addProviderTiles("CartoDB.Voyager") %>%
  addMarkers(
    clusterOptions = markerClusterOptions(),
    group = "listings",
    options = markerOptions(
      fillColor = ~colors_red(price),
      fillOpacity = 0.75,
      weight = 2,
      color = "black",
      radius = 5
    ),
    popup = ~paste(
      "<strong>Price:</strong>", price, "<br>",
      "<strong>Room Type:</strong>", room_type, "<br>",
      "<strong>Minimum Nights:</strong>", minimum_nights, "<br>",
      "<strong>Bedrooms:</strong>", bedrooms, "<br>",
      "<strong>Beds:</strong>", beds, "<br>",
      "<strong>Baths:</strong>", baths
    )
  ) %>%
  addMiniMap(toggleDisplay = TRUE)

map
#Combining Map 1 & 2
library(sf)
library(leaflet)
library(leaflet.extras)

coor <- st_read("coor")
## Reading layer `coor' from data source `/Users/elragaby/Desktop/Data Science/coor' using driver `GeoJSON'
## Simple feature collection with 310 features and 4 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS 84
str(coor)
## Classes 'sf' and 'data.frame':   310 obs. of  5 variables:
##  $ neighborhood: chr  "Allerton" "Alley Pond Park" "Arden Heights" "Arlington" ...
##  $ boroughCode : chr  "2" "4" "5" "5" ...
##  $ borough     : chr  "Bronx" "Queens" "Staten Island" "Staten Island" ...
##  $ X.id        : chr  "http://nyc.pediacities.com/Resource/Neighborhood/Allerton" "http://nyc.pediacities.com/Resource/Neighborhood/Alley_Pond_Park" "http://nyc.pediacities.com/Resource/Neighborhood/Arden_Heights" "http://nyc.pediacities.com/Resource/Neighborhood/Arlington" ...
##  $ geometry    :sfc_POLYGON of length 310; first list element: List of 1
##   ..$ : num [1:16, 1:2] -73.8 -73.8 -73.9 -73.9 -73.9 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
##   ..- attr(*, "names")= chr [1:4] "neighborhood" "boroughCode" "borough" "X.id"
ny_airbnb <- read.csv("NY2024.csv")

# Calculating average price by neighborhood
feq <- aggregate(ny_airbnb$price, by = list(ny_airbnb$neighborhood), FUN = mean)
names(feq) <- c('neighborhood', 'average_price')

# Merging 'coor' and 'feq' data frames on 'neighborhood'
coor <- merge(coor, feq, by='neighborhood', all.x=TRUE)

# Rounding 'average_price' to the nearest integer
coor$average_price <- round(coor$average_price)

color_scale <- colorNumeric(palette= "Purples", domain = NULL)

# Creating a leaflet map
map <- leaflet(coor) %>%
  setView(lng = -74.00, lat = 40.71, zoom = 10) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addPolygons(
    fillColor = ~color_scale(average_price),
    weight = 1,
    color = "black",
    fillOpacity = 0.5,
    highlight = highlightOptions(
      weight = 3,
      fillColor = ~color_scale(average_price),
      fillOpacity = 0.8
    ),
    #label = ~paste(neighborhood, ': '," $",average_price, "/ night")
  ) %>%
  addLegend(
    position = "bottomright",
    pal = color_scale,
    values = coor$average_price,
    title = "Average Price",
    opacity = 0.7
  )

# Creating a simple spatial object
NYData_sf <- st_as_sf(NYData, coords = c("longitude", "latitude"), crs = 4326)

colors_red <- colorNumeric(palette = "Reds", domain = NULL)

# Adding markers to the existing map
map <- addMarkers(
  map,
  data = NYData_sf,
  clusterOptions = markerClusterOptions(),
  group = "listings",
  options = markerOptions(
    fillColor = ~colors_red(price),
    fillOpacity = 0.75,
    weight = 2,
    color = "black",
    radius = 5
  ),
  popup = ~paste(
    "<strong>Price:</strong>", price, "<br>",
    "<strong>Room Type:</strong>", room_type, "<br>",
    "<strong>Minimum Nights:</strong>", minimum_nights, "<br>",
    "<strong>Bedrooms:</strong>", bedrooms, "<br>",
    "<strong>Beds:</strong>", beds, "<br>",
    "<strong>Baths:</strong>", baths
  )
) %>%
  addMiniMap(toggleDisplay = TRUE)
map